#generated with nuc_master
nuc_master = read.csv('~/data/nuc_master.csv')
nuc_master_sub = subset(nuc_master,cac_c40 > quantile(nuc_master$cac_c40,0.25) & wt_c40 > quantile(nuc_master$wt_c40,0.25) )

BAMtoGR <- function(bam,bai){
  library(Rsamtools)
  
  # Create parameters for scanBam
  what <- c("rname", "pos", "strand", "isize")
  param <- ScanBamParam(what = what)
  
  reads <- scanBam(file = bam, 
                   index = bai, 
                   param = param)
  # Convert reads to GR
  return (GRanges(seqnames = reads[[1]][['rname']],
                  ranges = IRanges(start = reads[[1]][['pos']], width = reads[[1]][['isize']]),
                  strand = reads[[1]][['strand']]))
}

chr_coordinates.df = data.frame(chr=(c("chrI", "chrII", "chrIII", "chrIV", "chrV", "chrVI", "chrVII", "chrVIII", "chrIX", "chrX", "chrXI", "chrXII",
                                       "chrXIII", "chrXIV", "chrXV","chrXVI")),
                                end=as.numeric(c("230218","813184", "316620", "1531933", "576874", "270161", "1090940", "562643", "439888", "745751", "666816", "1078177", "924431", "784333", "1091291", "948066")), stringsAsFactors=FALSE)


#nucleosome
# file_name = c('cac_pulse','cac_c10','cac_c15','cac_c20','cac_c40')
# cac_nuc_left_cut.list = list('pulse','c10','c15','c20','c40')
# cac_nuc_right_cut.list = list('pulse','c10','c15','c20','c40')

file_name = c('wt_pulse','wt_c10','wt_c15','wt_c20','wt_c40')
wt_nuc_left_cut.list = list('pulse','c10','c15','c20','c40')
wt_nuc_right_cut.list = list('pulse','c10','c15','c20','c40')

for (j in c(1:5)) {
  file_name.bam = paste("~/data/bam_bai/",file_name[j],".bam", sep='')
  file_name.bam.bai = paste("~/data/bam_bai/",file_name[j],".bam.bai",sep='')
  gr = BAMtoGR(file_name.bam,file_name.bam.bai)
  cat(paste('Finished converting to GR','\n'))
  
  mid.df_all=data.frame(chr=seqnames(gr),
                        mid=start(gr)+(width(gr)/2),
                        left_cut = c(start(gr)),
                        right_cut = c(end(gr)),
                        length=width(gr))
  
  left_cut=rep(0,401)
  right_cut=rep(0,401)
  
  for (k in 1:16) {
    mid.df = subset(mid.df_all, length >140 & length < 180 & chr==chr_coordinates.df$chr[k])
    nuc_sub = subset(nuc_master_sub,chr==chr_coordinates.df$chr[k])
    
    for (i in 1:nrow(nuc_sub)){
      peak = floor(nuc_sub$peak[i])
      
      mid.df_sub = subset(mid.df,mid > (peak - 70) & mid < (peak + 70))
      left_cut = left_cut + hist(mid.df_sub$left_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      right_cut = right_cut + hist(mid.df_sub$right_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      
      if((i %% 500) == 0 ){
        cat(paste("nucleosome #",i,"\n"))
      }
    }
    
  }
  
  cat(paste("Finishing file",j,"\n"))
  wt_nuc_left_cut.list[[j]] = left_cut
  wt_nuc_right_cut.list[[j]] = right_cut
  
}
#repeat for cac

#saveRDS(wt_nuc_right_cut.list,file='~/data/wt_nucMix_right_cut.RDS')
#saveRDS(wt_nuc_left_cut.list,file='~/data/wt_nucMix_left_cut.RDS')


#small fragment mix
#file_name = c('cac_pulse','cac_c10','cac_c15','cac_c20','cac_c40')
# cac_small_left_cut.list = list('pulse','c10','c15','c20','c40')
# cac_small_right_cut.list = list('pulse','c10','c15','c20','c40')

file_name = c('wt_pulse','wt_c10','wt_c15','wt_c20','wt_c40')
wt_small_left_cut.list = list('pulse','c10','c15','c20','c40')
wt_small_right_cut.list = list('pulse','c10','c15','c20','c40')

for (j in c(1:5)) {
  file_name.bam = paste("~/data/bam_bai/",file_name[j],".bam", sep='')
  file_name.bam.bai = paste("~/data/bam_bai/",file_name[j],".bam.bai",sep='')
  gr = BAMtoGR(file_name.bam,file_name.bam.bai)
  cat(paste('Finished converting to GR','\n'))
  
  mid.df_all=data.frame(chr=seqnames(gr),
                        mid=start(gr)+(width(gr)/2),
                        left_cut = c(start(gr)),
                        right_cut = c(end(gr)),
                        length=width(gr))
  
  left_cut=rep(0,401)
  right_cut=rep(0,401)
  
  for (k in 1:16) {
    mid.df = subset(mid.df_all, length >20 & length < 80 & chr==chr_coordinates.df$chr[k])
    nuc_sub = subset(nuc_master_sub,chr==chr_coordinates.df$chr[k])
    
    for (i in 1:nrow(nuc_sub)){
      peak = floor(nuc_sub$peak[i])
      
      mid.df_sub = subset(mid.df,mid > (peak - 70) & mid < (peak + 70))
      left_cut = left_cut + hist(mid.df_sub$left_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      right_cut = right_cut + hist(mid.df_sub$right_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      
      if((i %% 500) == 0 ){
        cat(paste("nucleosome #",i,"\n"))
      }
    }
    
  }
  
  cat(paste("Finishing file",j,"\n"))
  wt_small_left_cut.list[[j]] = left_cut
  wt_small_right_cut.list[[j]] = right_cut
  
}
#repeat for cac

#saveRDS(wt_small_right_cut.list,file='~/data/wt_smallMix_right_cut.RDS')
#saveRDS(wt_small_left_cut.list,file='~/data/wt_smallMix_left_cut.RDS')



#small fragment left right separate 
file_name = c('cac_pulse','cac_c10','cac_c15','cac_c20','cac_c40')
cac_small_cut.list = list(right_read_left_cut= list('pulse','c10','c15','c20','c40'),
                          right_read_right_cut= list('pulse','c10','c15','c20','c40'),
                          left_read_left_cut= list('pulse','c10','c15','c20','c40'),
                          left_read_right_cut= list('pulse','c10','c15','c20','c40'))


for (j in c(1:5)) {
  file_name.bam = paste("~/data/bam_bai/",file_name[j],".bam", sep='')
  file_name.bam.bai = paste("~/data/bam_bai/",file_name[j],".bam.bai",sep='')
  gr = BAMtoGR(file_name.bam,file_name.bam.bai)
  cat(paste('Finished converting to GR','\n'))
  
  mid.df_all=data.frame(chr=seqnames(gr),
                        mid=start(gr)+(width(gr)/2),
                        left_cut = c(start(gr)),
                        right_cut = c(end(gr)),
                        length=width(gr))
  
  mid.df_all = subset(mid.df_all, length > 20 & length < 80)
  
  left_counts.l=rep(0,401)
  left_counts.r=rep(0,401)
  
  right_counts.l=rep(0,401)
  right_counts.r=rep(0,401)
  
  for (k in 1:16) {
    mid.df = subset(mid.df_all, chr==chr_coordinates.df$chr[k])
    nuc_sub = subset(nuc_master_sub,chr==chr_coordinates.df$chr[k])
    for (i in 1:nrow(nuc_sub)){
      peak = floor(nuc_sub$peak[i])
      mid_small_right=subset(mid.df, mid>=(peak)&mid<=(peak+70))
      mid_small_left = subset(mid.df, mid>=(peak-70)&mid<=(peak))
      
      left_counts.l = left_counts.l + hist(mid_small_left$left_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      left_counts.r = left_counts.r + hist(mid_small_left$right_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      
      right_counts.l = right_counts.l + hist(mid_small_right$left_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      right_counts.r = right_counts.r + hist(mid_small_right$right_cut,seq(peak-200,peak+200+1,1),plot = F,right = F)$counts
      
      if((i %% 500) == 0 ){
        cat(paste("nucleosome #",i,"\n"))
      }
    }
  }
  cat(paste("Finishing file",j,"\n"))
  cac_small_cut.list[[1]][[j]]=right_counts.l
  cac_small_cut.list[[2]][[j]]=right_counts.r
  
  cac_small_cut.list[[3]][[j]]=left_counts.l
  cac_small_cut.list[[4]][[j]]=left_counts.r
}


#saveRDS(cac_small_cut.list,file='~/data/cac_small_cut.RDS')
